Functional Enrichment Analysis | Fisher test by module.
Level 3 of clusters. We keep only the modules with more than 30 nodes for downstream analysis.
# Network clusters
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/"
fea_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/sn_dlpfc/2nd_pass/FEA/heatmap/"
macro_type = params$cell_type #macro_structure. It can be cell_type, metabolites, region of the brain.
message(paste0("Cluster: ", macro_type))## Cluster: ext
modules_file = read.table(paste0(net_dir, macro_type, "/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
net_output = modules_file %>% filter(! cluster_lv3 %in% as.integer(too_small)) # the clusters with at least 30 nodes
createDT(net_output)Universe tested:
length(net_output$ensembl)## [1] 16833
Pvalue adjusted by Bonferroni
# Gene lists. These files must contain 2 columns, with header: 1) symbol, 2) ensembl
path_to_files = "/pastel/resources/gene_lists/mynd_lists/"
geneList_file_names = c("antigen_presentation_GO.txt",
"apoptosis_GO.txt",
"autophagy_GO.txt",
"cell_proliferation_GO.txt",
"DNA_metabolic_GO.txt",
"DNA_methylation_GO.txt",
"DNA_repair_GO.txt",
"DNA_replication_GO.txt",
"endosomal_transport_GO.txt",
"exocytosis_GO.txt",
"glucose_metabolism_GO.txt",
"IFNb_response_GO.txt",
"IFNg_response_GO.txt",
"inflammatory_resp_GO.txt",
"lipid_metabolism_GO.txt",
"lysosome_GO.txt",
"macroautophagy_GO.txt",
"mitochondria_GO.txt",
"neutrophil_activation_GO.txt",
"phagocytosis_GO.txt",
"protein_ubiquitinization_GO.txt",
"proteolysis_GO.txt",
"response_cytokine_GO.txt",
"ribosome_GO.txt",
"RNA_splicing_GO.txt",
"translation_GO.txt",
"vesicle_med_transp_GO.txt",
"viral_response_GO.txt")
genes_universe = net_output$ensembl
cluster_lables = net_output$cluster_lv3
geneAnnotation_lists = parseGeneLists(path_to_files, geneList_file_names, genes_universe)
gene_cluster_enrich = moduleEnrich(genes_universe, cluster_lables, geneAnnotation_lists)
p = plot_module_enrichment_heatmap(cluster_lables, gene_cluster_enrich, plot_title = "Module enrichment", filter_pval = 1, bonf.adj.pval = T)
pdf(file = paste0(fea_dir, "h_fisher_",macro_type,"_cl3_GO_bonf.pdf"), width = 16, height = 17)
p
dev.off()## png
## 2
pPvalue adjusted by Bonferroni.
Cell type markers from Johnson et al, 2022 , plus m109 from Mostafavi et al, 2018. GWAS AD from Bellenguez et al, 2022. Plaque-induced gene list - PIG from Chen et al, 2020.
# Gene lists. These files must contain 2 columns, with header: 1) symbol, 2) ensembl
path_to_files = "/pastel/resources/gene_lists/celltype_markers/"
geneList_file_names = c("astrocytes.txt",
"microglia.txt",
"neuron.txt",
"oligodendrocytes.txt",
"endothelia.txt",
"m109_mostafavi.txt",
"GWAS_AD_Bel2022.txt",
"PIG_orthologs.txt")
genes_universe = net_output$ensembl
cluster_lables = net_output$cluster_lv3
geneAnnotation_lists = parseGeneLists(path_to_files, geneList_file_names, genes_universe)
gene_cluster_enrich = moduleEnrich(genes_universe, cluster_lables, geneAnnotation_lists)
p = plot_module_enrichment_heatmap(cluster_lables, gene_cluster_enrich, plot_title = "Module enrichment", filter_pval = 1, bonf.adj.pval = T)
pdf(file = paste0(fea_dir, "h_fisher_",macro_type,"_cl3_cellType_bonf.pdf"), width = 8, height = 15)
p
dev.off()## png
## 2
psessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ComplexHeatmap_2.11.1 circlize_0.4.14 forcats_0.5.1 stringr_1.4.0 purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [8] tibble_3.1.6 tidyverse_1.3.1 dplyr_1.0.8 performance_0.10.0 lmerTest_3.1-3 lme4_1.1-28 Matrix_1.3-4
## [15] gplots_3.1.1 broom_0.7.12 ggplot2_3.3.5 ggeasy_0.1.3
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-153 matrixStats_0.61.0 bitops_1.0-7 fs_1.5.2 lubridate_1.8.0 doParallel_1.0.17 insight_0.18.5 RColorBrewer_1.1-2
## [9] httr_1.4.2 numDeriv_2016.8-1.1 bslib_0.3.1 tools_4.1.2 backports_1.4.1 utf8_1.2.2 R6_2.5.1 DT_0.20
## [17] KernSmooth_2.23-20 vipor_0.4.5 BiocGenerics_0.40.0 DBI_1.1.2 colorspace_2.0-3 GetoptLong_1.0.5 withr_2.4.3 tidyselect_1.1.2
## [25] compiler_4.1.2 cli_3.2.0 rvest_1.0.2 xml2_1.3.3 sass_0.4.0 caTools_1.18.2 scales_1.1.1 digest_0.6.29
## [33] minqa_1.2.4 rmarkdown_2.11 pkgconfig_2.0.3 htmltools_0.5.2 highr_0.9 dbplyr_2.1.1 fastmap_1.1.0 htmlwidgets_1.5.4
## [41] rlang_1.0.1 GlobalOptions_0.1.2 readxl_1.3.1 rstudioapi_0.13 jquerylib_0.1.4 shape_1.4.6 generics_0.1.2 jsonlite_1.7.3
## [49] crosstalk_1.2.0 gtools_3.9.2 magrittr_2.0.2 S4Vectors_0.32.3 Rcpp_1.0.8 ggbeeswarm_0.6.0 munsell_0.5.0 fansi_1.0.2
## [57] lifecycle_1.0.1 stringi_1.7.6 yaml_2.3.5 MASS_7.3-54 parallel_4.1.2 crayon_1.5.0 lattice_0.20-45 haven_2.4.3
## [65] splines_4.1.2 hms_1.1.1 knitr_1.37 pillar_1.7.0 rjson_0.2.21 boot_1.3-28 stats4_4.1.2 codetools_0.2-18
## [73] reprex_2.0.1 glue_1.6.1 evaluate_0.15 modelr_0.1.8 png_0.1-7 foreach_1.5.2 vctrs_0.3.8 nloptr_2.0.0
## [81] tzdb_0.2.0 cellranger_1.1.0 gtable_0.3.0 clue_0.3-60 assertthat_0.2.1 xfun_0.29 pheatmap_1.0.12 iterators_1.0.14
## [89] IRanges_2.28.0 beeswarm_0.4.0 cluster_2.1.2 ellipsis_0.3.2